A look at stock market indexes, specifically the performance relative to historical trends.
import pandas as pd
import numpy as np
from io import StringIO
from urllib.error import HTTPError
import altair as alt
import yfinance as yf
from os import environ
try:
# for local execution
apiKeyFromFile = open("/Users/kyledunn/fredApiKey.txt", "r").read().strip()
except FileNotFoundError:
apiKeyFromFile = None
pass
# for CI
apiKey = environ.get("FRED_API_KEY", apiKeyFromFile)
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
def getSeries(series="", apiKey=None, description=None):
fetchCommand = "https://api.stlouisfed.org/fred/series/observations?series_id={s}&realtime_end=9999-12-31&api_key={k}&file_type=txt"
resp = urlopen(fetchCommand.format(s=series, k=apiKey))
zipfile = ZipFile(BytesIO(resp.read()))
filesInZip = zipfile.namelist()
data = zipfile.open(filesInZip[1])
if description is None:
description = series
df = pd.read_csv(data, sep="\t", header=None, skiprows=1,
names=["date", description, "rt_start", "rt_end"], na_values=".")
df['date'] = pd.to_datetime(df.date)
return df.set_index("date")
dji = yf.Ticker("^DJI")
# get historical market data
df_dji = dji.history(period="max")
subset = df_dji[df_dji.index <= '1995-12-31']
y = subset.Close.values
x = [n for n, v in enumerate(subset.index.values)]
z = np.polyfit(x, y, 1)
# a * 1 * x^1 + b
trend = np.add(np.multiply(z[1], 1), np.multiply(range(df_dji.shape[0]), z[0]))
alt.data_transformers.disable_max_rows()
(
alt.Chart(pd.DataFrame(index=df_dji.index, data=trend, columns=['fit']).reset_index()).mark_line(color='blue').encode(
alt.X('Date:T', axis=alt.Axis(title='')),
alt.Y('fit:Q', axis=alt.Axis(title=''))
) +\
alt.Chart(df_dji.reset_index()).mark_line(color='black').encode(
alt.X('Date:T', axis=alt.Axis(title='')),
alt.Y('Close:Q', axis=alt.Axis(title='Index Value')),
tooltip=[alt.Tooltip('Date:T'), alt.Tooltip('Close:Q')]
)
).properties(
title='US Dow Jones Industrial',
width=750,
height=400,
background='white'
)
sp500 = yf.Ticker("^GSPC")
# get historical market data
df_sp500 = sp500.history(period="max")
#df_sp500.head()
init = df_sp500[(df_sp500.index > '1979-12-31')]
subset = init[(init.index <= '1995-12-31')]
y = subset.Close.values
x = range(subset.shape[0])
z = np.polyfit(x, y, 1)
trend = np.add(np.multiply(z[1], 1), np.multiply(range(init.shape[0]), z[0]))
(
alt.Chart(pd.DataFrame(index=init.index, data=trend, columns=['fit']).reset_index()).mark_line(color='blue').encode(
alt.X('Date:T', axis=alt.Axis(title='')),
alt.Y('fit:Q', axis=alt.Axis(title=''))
) +\
alt.Chart(init.reset_index()).mark_line(color='black').encode(
alt.X('Date:T', axis=alt.Axis(title='')),
alt.Y('Close:Q', axis=alt.Axis(title='Index Value')),
tooltip=[alt.Tooltip('Date:T'), alt.Tooltip('Close:Q')]
)
).properties(
title='US S&P 500',
width=750,
height=400,
background='white'
)
rut = yf.Ticker("^RUT")
# get historical market data
df_rut = rut.history(period="max")
#df_sp500.head()
init_r = df_rut[(df_rut.index > '1979-12-31')]
subset_r = init_r[(init_r.index <= '1995-12-31')]
y_r = subset_r.Close.values
x_r = range(subset_r.shape[0])
z_r = np.polyfit(x_r, y_r, 1)
trend_r = np.add(np.multiply(z_r[1], 1), np.multiply(range(init_r.shape[0]), z[0]))
(
alt.Chart(pd.DataFrame(index=init_r.index, data=trend_r, columns=['fit']).reset_index()).mark_line(color='blue').encode(
alt.X('Date:T', axis=alt.Axis(title='')),
alt.Y('fit:Q', axis=alt.Axis(title=''))
) +\
alt.Chart(init_r.reset_index()).mark_line(color='black').encode(
alt.X('Date:T', axis=alt.Axis(title='')),
alt.Y('Close:Q', axis=alt.Axis(title='Index Value')),
tooltip=[alt.Tooltip('Date:T'), alt.Tooltip('Close:Q')]
)
).properties(
title='US Russell 2000',
width=750,
height=400,
background='white'
)
# CPIAUCSL
df_cpi = getSeries("CPIAUCSL", apiKey=apiKey, description="CPI")
returns = (df_sp500['Close'].resample("1M").last() / df_cpi.CPI.resample("1M").last()).dropna().reset_index()
returns.columns = ['Date', 'Adj-close']
alt.Chart(returns.reset_index()).mark_line().encode(
alt.X('Date:T'),
alt.Y('Adj-close:Q')
).properties(
title='CPI Adjusted SP500',
width=700,
height=450
)
# Compute CAGR for each period
years = 20
totals = []
for i in range(0, int(returns.shape[0]-(years*12)-1)):
totals.append(dict({
"start": returns.iloc[i, 0],
"return": ((returns.iloc[(years*12)+(i), 1] / returns.iloc[i, 1])**(1/years) - 1) * 100
}))
df_totals = pd.DataFrame.from_dict(totals)
alt.Chart(df_totals).mark_line().encode(
alt.X('start:T', axis=alt.Axis(title='')),
alt.Y('return:Q', axis=alt.Axis(title=f'Total {years}yr Return [CAGR]'))
).properties(
title='Inflation Adjusted S&P 500 Returns (CAGR) vs Start Date of Investment',
width=700,
height=450
)
hist = alt.Chart(df_totals).transform_joinaggregate(
total='count(*)'
).transform_calculate(
pct='1 / datum.total',
decade='toString(floor(year(datum.start) / 10)) + "0\'s"'
).mark_bar(opacity=0.6).encode(
alt.X("return:Q", bin=True, axis=alt.Axis(title=f'{years}yr CAGR Return [%]')),
alt.Y('sum(pct):Q', axis=alt.Axis(title='Likelihood')),
tooltip=[alt.Tooltip('sum(pct):Q', title='Likelihood of return being in this range', format=',.02f')]
).properties(
title='Distribution of Inflation Adjusted S&P 500 Returns (CAGR)',
width=700,
height=450
)
cumu = alt.Chart(df_totals).mark_line(color='black', interpolate='step-after').transform_joinaggregate(
total='count(*)'
).transform_calculate(
pct='1 / datum.total'
).transform_window(
frame=[None, 0],
sort=[{"field": "return"}],
cumu='sum(pct)'
).encode(
alt.X("return:Q"),
alt.Y('cumu:Q', axis=alt.Axis(title='Cumulative Likelihood')),
tooltip=[alt.Tooltip('cumu:Q', title='Likelihood of return being less than or equal', format=',.02f'),
alt.Tooltip('return:Q', title=' CAGR', format=',.02f')]
).properties(
title='Distribution of Inflation Adjusted S&P 500 Returns (CAGR)',
width=700,
height=450,
)
c = (hist + cumu).resolve_scale(y='independent').properties(background='white')
c.save('sp500-returns-and-performance.png')
c.display()
histf = alt.Chart(df_totals).transform_joinaggregate(
total='count(*)'
).transform_calculate(
pct='1 / datum.total',
decade='toString(floor(year(datum.start) / 10)) + "0\'s"'
).mark_bar(opacity=0.6).encode(
alt.X("return:Q", bin=True, axis=alt.Axis(title='40yr CAGR Return [%]')),
alt.Y('sum(pct):Q', axis=alt.Axis(title='Likelihood')),
alt.Color("decade:N"),
alt.Row("decade:N")
).properties(
title='Distribution of Inflation Adjusted S&P 500 Returns (CAGR)',
width=700,
height=450
)
histf.display()
df_spex = pd.read_html("http://www.proshares.com/funds/spxt_daily_holdings.html")[0]
#df_spex.head()
%%time
urlFor = lambda t: "https://stockrow.com/api/companies/{}/financials.xlsx?dimension=Q§ion=Balance%20Sheet&sort=desc".format(t)
def dfFor(t):
try:
tmp = pd.read_excel(urlFor(t))
tmp.columns = list(map(lambda v: v if isinstance(v, str) else "{}-{}-{}".format(v.year, v.month, v.day), tmp.columns.to_list()))
tmp = tmp.dropna().set_index("Unnamed: 0").T
tmp['Ticker'] = [t]*tmp.shape[0]
return tmp
except HTTPError:
print("{} failed".format(t))
return None
except KeyError:
print("{} failed".format(t))
return None
map_sp500 = map(dfFor, df_spex['Ticker Symbol'].values)
all_df = pd.concat(map_sp500)
#all_df.head()
all_df['dt'] = pd.to_datetime(all_df.index, format="%Y-%m-%d")
#all_df['dt'].head()
#all_df.dtypes
alt.data_transformers.disable_max_rows()
def violinFor(df, metric='Total Debt'):
return alt.Chart(df[['dt', metric]].dropna()).transform_calculate(
yr='year(datum.dt)'
).transform_density(
metric,
as_=['{}'.format(metric.replace(" ", "_")), 'density'],
extent=[df[metric].quantile(0.1), df[metric].quantile(0.9)],
maxsteps=1000,
groupby=['yr']
).mark_area(orient='horizontal').encode(
alt.Y('{}:Q'.format(metric.replace(" ", "_"))),
alt.Color('yr:N', title='Year'),
x=alt.X(
'density:Q',
stack='center',
impute=None,
title=None,
axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True),
),
column=alt.Column(
'yr:N',
header=alt.Header(
titleOrient='bottom',
labelOrient='bottom',
labelPadding=0,
),
)
).properties(
title="Changing {} Profile of S&P 500 (ex tech)".format(metric),
width=100
)
violinFor(all_df)
violinFor(all_df, metric='Treasury Stock')
charts = []
for c in all_df.columns:
if c in ['dt', 'Ticker']:
continue
charts.append(violinFor(all_df, metric=c))
#alt.vconcat(*charts)
def chartFor(df, metric='Long Term Debt (Total)', color='steelblue'):
summed = df.copy().reset_index().groupby("dt").sum().resample('1Q').mean()
#yoy = summed.sort_index().dropna().pct_change(12).dropna().apply(lambda v: v * 100)
return alt.Chart(summed.sort_index().reset_index()[:-1]).mark_line(color=color).encode(
alt.X("dt:T", axis=alt.Axis(title='', format="%b %Y")),
alt.Y("{}:Q".format(metric), axis=alt.Axis(title="{} [USD]".format(metric))),
tooltip=[alt.Tooltip("dt:T", format="%b %Y"), alt.Tooltip("{}:Q".format(metric))]
).properties(
title="{} trend for S&P 500 (Ex Technology)".format(metric),
width=700,
height=450
)
chartFor(all_df[all_df['dt'] > '2012-01-01'])
lcharts = []
for c in all_df.columns:
if c in ['dt', 'Ticker']:
continue
lcharts.append(chartFor(all_df[all_df['dt'] > '2012-01-01'].dropna(how='all'), metric=c))
alt.vconcat(*lcharts)
def yoyChartFor(df, metric='Long Term Debt (Total)'):
summed = df.copy().reset_index().groupby("dt").sum().resample('1Q').mean()
yoy = summed.sort_index().dropna().pct_change(4).dropna().apply(lambda v: v * 100)
return alt.Chart(yoy.sort_index().reset_index()[:-1]).mark_bar(color='royalblue').encode(
alt.X("dt:T", axis=alt.Axis(title='', format="%b %Y")),
alt.Y("{}:Q".format(metric), axis=alt.Axis(title="{} Growth [year-over-year %]".format(metric))),
tooltip=[alt.Tooltip("dt:T", format="%b %Y"), alt.Tooltip("{}:Q".format(metric))]
).properties(
title="{} growth for S&P 500 (Ex Technology)".format(metric),
width=700,
height=450
)
yoyChartFor(all_df[all_df['dt'] > '2012-01-01'])
ycharts = []
for c in all_df.columns:
if c in ['dt', 'Ticker']:
continue
ycharts.append(yoyChartFor(all_df[all_df['dt'] > '2012-01-01'].dropna(how='all'), metric=c))
alt.vconcat(*ycharts)
yoyChartFor(all_df[all_df['dt'] > '2012-01-01'], 'Pension and Post-Retirement Liabilities')
yoyChartFor(all_df[all_df['dt'] > '2012-01-01'], 'Liabilities (Preferred Stock)')
yoyChartFor(all_df[all_df['dt'] > '2012-01-01'], 'Shares (Common)')
chartFor(all_df[all_df['dt'] > '2012-01-01'], 'Shares (Common)')
chartFor(all_df[all_df['dt'] > '2012-01-01'], 'Goodwill and Intangible Assets (Total)')
chartFor(all_df[all_df['dt'] > '2012-01-01'], 'Dividends Payable')
chartFor(all_df[all_df['dt'] > '2012-01-01'], 'Property, Plant, Equpment (Net)')
chartFor(all_df[all_df['dt'] > '2012-01-01'], 'Receivables')
chartFor(all_df[all_df['dt'] > '2012-01-01'], 'Inventory')
chartFor(all_df[all_df['dt'] > '2012-01-01'], 'Receivables', color='green') +\
chartFor(all_df[all_df['dt'] > '2012-01-01'], 'Accounts Payable', color='orange')
chartFor(all_df[all_df['dt'] > '2012-01-01'], 'Total Assets', color='green') +\
chartFor(all_df[all_df['dt'] > '2012-01-01'], 'Total liabilities', color='orange')
chartFor(all_df[all_df['dt'] > '2012-01-01'], 'Treasury Stock')
chartFor(all_df[all_df['dt'] > '2012-01-01'], 'Cash and Short Term Investments')
Exported from analysis/sp500-returns-and-performance.ipynb committed by GitHub Action on Sat Apr 11 01:01:22 2020 revision 2, 619d6c1